rm(list = ls(all.names = TRUE))
gc()
set.seed(211917)
options(scipen=999)

packages <-c("tidyverse","foreign","stargazer","naniar","ri2")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("/Users/hagerans/Dropbox/Research/Kenya 2014/5. Deontologic Radicals/Deontology paper/Replication/replication/")

# load data
dat <- read.dta("./data.dta")

#---------------#
# Data cleaning #
#---------------#

dat$q6_years_married[dat$q6_years_married == -9] <- 0
dat$treatment_binary <- ifelse(dat$holy_endorser==1 | dat$secular_endorser==1, 0, 1)

## select all possible covariates and outcome at first
dat <- dat %>% 
  select(q30_combined, treatment_binary, treatment, holy_endorser, secular_endorser, respondent_muslim, respondent_male, enumerator_muslim, enumerator_male, q1_age, q2_male, q3_kamba, q3_kikuyu, q3_luo, q3_somali, q3_other, q4_religion_christian, q4_religion_muslim, q5_convert, q6_years_married, q6_married, q7_has_job, q8_income_last_month, q8_has_income, q9_chance_become_rich, q10_plans_to_vote, q11_govt_represents_interest, q12_feels_on_loosing_side, q13_a_knows_emigrant, q13_b_knows_emigrant_som_eri, q15_witnesses_interrel_violence, q16_witnesses_viol_musl_govt, q17_friend_family_died, q18_lost_job, q18_arrested, q18_house_of_worship_raided, q18_stopped_speaking_parents, q18_love_problems, q18_friend_left_country, q18_sum, q19_rlts_mother, q19_not_raised_by_mom, q19_mother_dead, q20_rlts_father, q20_not_raised_by_father, q20_father_dead, q21_respect_friends_family, q22_gender, q22_religion, q22_tribal, q22_national, q22_youth, q23_religious_attendance, q23_frequents_house_worship, q24_deontology, q25_payment_acceptable, q26_payment_ethical, q27_radicalization, q33_distance)

## check for missings
vis_miss(dat)

## following variables seems problematic, i.e., shouldn't impute at this level
table(is.na(dat$q13_a_knows_emigrant))
table(is.na(dat$q19_rlts_mother))
table(is.na(dat$q20_rlts_father))
table(is.na(dat$q23_religious_attendance))

## remove problematic vars
dat <- dat %>% select(-q13_a_knows_emigrant, -q19_rlts_mother, -q20_rlts_father, -q23_religious_attendance)

## check missing again:
vis_miss(dat) 

## remove one missing treatment indicator (can't be imputed)
dat <- dat[which(dat$holy_endorser>-1),]

## impute rest of missings with mean 
for(i in 1:ncol(dat)){
  dat[is.na(dat[,i]), i] <- mean(dat[,i], na.rm = TRUE)
}
rm(i)

## to be save, remove missing cases
dat <- dat[complete.cases(dat), ]

killing_d <- dat[which(dat$treatment==1),]
suicide_d <- dat[which(dat$treatment==2),]

#-------#
# table #
#-------#


balance <- function(data, sources, vars){
  
  loop_source <- function(x){
    data_i <- data[which(data[[sources[x]]] == 1),]
    data_i <- data_i[,vars]
    
    data_i <- data_i%>%
      summarise(across(everything(), list(mean = mean, sd = sd)))
    
    
    meancol <- grep("_mean", colnames(data_i))
    sdcol <- grep("_sd", colnames(data_i))
    
    meancol <- t(data_i[,meancol])
    sdcol <- t(data_i[,sdcol])
    
    names <- c(paste("mean",sources[x],sep = "_"), paste("sd",sources[x],sep = "_"))
    
    data_i <- cbind(meancol,sdcol)
    colnames(data_i) <- names
    return(data_i)
  }
  out <- purrr::map(1:length(sources), ~loop_source(.x))
  out <- as.data.frame(do.call(cbind,out))
  colnames(out) <- paste(deparse(substitute(data)), colnames(out), sep = "_")
  return(out)
}


vars <- c("q2_male","respondent_muslim","q1_age","q5_convert","q6_married","q6_years_married",
          "q7_has_job","q8_has_income","q8_income_last_month","q3_kamba","q3_kikuyu","q3_luo",
          "q3_somali","enumerator_muslim","enumerator_male")

samp_killing <- balance(data = killing_d, sources = c("holy_endorser","secular_endorser","treatment_binary"), vars = vars)

samp_suicide <- balance(data = suicide_d, sources = c("holy_endorser","secular_endorser","treatment_binary"), vars = vars)

table <- as.data.frame(cbind(samp_killing,samp_suicide))

xtable(table)
